{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Rename and Replace " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd " ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# read a dataset of UFO reports into a DataFrame\n", "ufo = pd.read_csv('http://bit.ly/uforeports')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CityColors ReportedShape ReportedStateTime
0IthacaNaNTRIANGLENY6/1/1930 22:00
1WillingboroNaNOTHERNJ6/30/1930 20:00
2HolyokeNaNOVALCO2/15/1931 14:00
3AbileneNaNDISKKS6/1/1931 13:00
4New York Worlds FairNaNLIGHTNY4/18/1933 19:00
\n", "
" ], "text/plain": [ " City Colors Reported Shape Reported State Time\n", "0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00\n", "1 Willingboro NaN OTHER NJ 6/30/1930 20:00\n", "2 Holyoke NaN OVAL CO 2/15/1931 14:00\n", "3 Abilene NaN DISK KS 6/1/1931 13:00\n", "4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# examine the first 5 rows \n", "ufo.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['City', 'Colors Reported', 'Shape Reported', 'State', 'Time'], dtype='object')" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# examine the column names \n", "ufo.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Rename " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# rename two of the columns by useing `rename` method\n", "ufo.rename(columns={'Colors Reported': 'Colors_Reported', 'Shape Reported': 'Shape_Reported'}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CityColors_ReportedShape_ReportedStateTime
0IthacaNaNTRIANGLENY6/1/1930 22:00
1WillingboroNaNOTHERNJ6/30/1930 20:00
2HolyokeNaNOVALCO2/15/1931 14:00
3AbileneNaNDISKKS6/1/1931 13:00
4New York Worlds FairNaNLIGHTNY4/18/1933 19:00
\n", "
" ], "text/plain": [ " City Colors_Reported Shape_Reported State Time\n", "0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00\n", "1 Willingboro NaN OTHER NJ 6/30/1930 20:00\n", "2 Holyoke NaN OVAL CO 2/15/1931 14:00\n", "3 Abilene NaN DISK KS 6/1/1931 13:00\n", "4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ufo.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['city', 'colors reported', 'shape reported', 'state', 'time'], dtype='object')" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# replace all of the column names by overwritting the 'colums' attribute \n", "ufo_cols = ['city', 'colors reported', 'shape reported', 'state', 'time']\n", "ufo.columns = ufo_cols\n", "# see modified columns \n", "ufo.columns" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# replace the column names during the file reading process by using the 'names' parameter\n", "ufo = pd.read_csv('http://bit.ly/uforeports', names=ufo_cols)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
citycolors reportedshape reportedstatetime
0CityColors ReportedShape ReportedStateTime
1IthacaNaNTRIANGLENY6/1/1930 22:00
2WillingboroNaNOTHERNJ6/30/1930 20:00
3HolyokeNaNOVALCO2/15/1931 14:00
4AbileneNaNDISKKS6/1/1931 13:00
\n", "
" ], "text/plain": [ " city colors reported shape reported state time\n", "0 City Colors Reported Shape Reported State Time\n", "1 Ithaca NaN TRIANGLE NY 6/1/1930 22:00\n", "2 Willingboro NaN OTHER NJ 6/30/1930 20:00\n", "3 Holyoke NaN OVAL CO 2/15/1931 14:00\n", "4 Abilene NaN DISK KS 6/1/1931 13:00" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# examine the 5 rows \n", "ufo.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['city', 'colors_reported', 'shape_reported', 'state', 'time'], dtype='object')" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# replace all spaces with underscores in the column names by using the 'str.replace' method\n", "ufo.columns = ufo.columns.str.replace(' ', '_')\n", "ufo.columns" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
citycolors_reportedshape_reportedstatetime
0CityColors ReportedShape ReportedStateTime
1IthacaNaNTRIANGLENY6/1/1930 22:00
2WillingboroNaNOTHERNJ6/30/1930 20:00
3HolyokeNaNOVALCO2/15/1931 14:00
4AbileneNaNDISKKS6/1/1931 13:00
\n", "
" ], "text/plain": [ " city colors_reported shape_reported state time\n", "0 City Colors Reported Shape Reported State Time\n", "1 Ithaca NaN TRIANGLE NY 6/1/1930 22:00\n", "2 Willingboro NaN OTHER NJ 6/30/1930 20:00\n", "3 Holyoke NaN OVAL CO 2/15/1931 14:00\n", "4 Abilene NaN DISK KS 6/1/1931 13:00" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# let's look at DataFrame\n", "ufo.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Replace" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# read another dataset \n", "fm = pd.read_csv(\"../data/framingham.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
maleageeducationcurrentSmokercigsPerDayBPMedsprevalentStrokeprevalentHypdiabetestotCholsysBPdiaBPBMIheartRateglucoseTenYearCHD
01394.000.00.0000195.0106.070.026.9780.077.00
10462.000.00.0000250.0121.081.028.7395.076.00
21481.0120.00.0000245.0127.580.025.3475.070.00
30613.0130.00.0010225.0150.095.028.5865.0103.01
40463.0123.00.0000285.0130.084.023.1085.085.00
\n", "
" ], "text/plain": [ " male age education currentSmoker cigsPerDay BPMeds prevalentStroke \\\n", "0 1 39 4.0 0 0.0 0.0 0 \n", "1 0 46 2.0 0 0.0 0.0 0 \n", "2 1 48 1.0 1 20.0 0.0 0 \n", "3 0 61 3.0 1 30.0 0.0 0 \n", "4 0 46 3.0 1 23.0 0.0 0 \n", "\n", " prevalentHyp diabetes totChol sysBP diaBP BMI heartRate glucose \\\n", "0 0 0 195.0 106.0 70.0 26.97 80.0 77.0 \n", "1 0 0 250.0 121.0 81.0 28.73 95.0 76.0 \n", "2 0 0 245.0 127.5 80.0 25.34 75.0 70.0 \n", "3 1 0 225.0 150.0 95.0 28.58 65.0 103.0 \n", "4 0 0 285.0 130.0 84.0 23.10 85.0 85.0 \n", "\n", " TenYearCHD \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 1 \n", "4 0 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# examine first few rows \n", "fm.head() " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# first rename `male` to `sex` \n", "fm.rename(columns={\"male\": \"sex\"}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sexageeducationcurrentSmokercigsPerDayBPMedsprevalentStrokeprevalentHypdiabetestotCholsysBPdiaBPBMIheartRateglucoseTenYearCHD
01394.000.00.0000195.0106.070.026.9780.077.00
10462.000.00.0000250.0121.081.028.7395.076.00
21481.0120.00.0000245.0127.580.025.3475.070.00
30613.0130.00.0010225.0150.095.028.5865.0103.01
40463.0123.00.0000285.0130.084.023.1085.085.00
\n", "
" ], "text/plain": [ " sex age education currentSmoker cigsPerDay BPMeds prevalentStroke \\\n", "0 1 39 4.0 0 0.0 0.0 0 \n", "1 0 46 2.0 0 0.0 0.0 0 \n", "2 1 48 1.0 1 20.0 0.0 0 \n", "3 0 61 3.0 1 30.0 0.0 0 \n", "4 0 46 3.0 1 23.0 0.0 0 \n", "\n", " prevalentHyp diabetes totChol sysBP diaBP BMI heartRate glucose \\\n", "0 0 0 195.0 106.0 70.0 26.97 80.0 77.0 \n", "1 0 0 250.0 121.0 81.0 28.73 95.0 76.0 \n", "2 0 0 245.0 127.5 80.0 25.34 75.0 70.0 \n", "3 1 0 225.0 150.0 95.0 28.58 65.0 103.0 \n", "4 0 0 285.0 130.0 84.0 23.10 85.0 85.0 \n", "\n", " TenYearCHD \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 1 \n", "4 0 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Now take a look at dataset \n", "fm.head() " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Replace Value for Better Understanding of Dataset\n", "__sex__\n", "* 1 = Male \n", "* 0 = Female \n", "\n", "__diabetes__\n", "* 1 = Yes \n", "* 0 = No " ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# replace sex column value\n", "fm['sex'].replace({1: \"male\", 0: \"female\"}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# replace diabetes column value \n", "fm['diabetes'].replace({1: \"yes\", 0: \"no\"}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sexageeducationcurrentSmokercigsPerDayBPMedsprevalentStrokeprevalentHypdiabetestotCholsysBPdiaBPBMIheartRateglucoseTenYearCHD
0male394.000.00.000no195.0106.070.026.9780.077.00
1female462.000.00.000no250.0121.081.028.7395.076.00
2male481.0120.00.000no245.0127.580.025.3475.070.00
3female613.0130.00.001no225.0150.095.028.5865.0103.01
4female463.0123.00.000no285.0130.084.023.1085.085.00
\n", "
" ], "text/plain": [ " sex age education currentSmoker cigsPerDay BPMeds prevalentStroke \\\n", "0 male 39 4.0 0 0.0 0.0 0 \n", "1 female 46 2.0 0 0.0 0.0 0 \n", "2 male 48 1.0 1 20.0 0.0 0 \n", "3 female 61 3.0 1 30.0 0.0 0 \n", "4 female 46 3.0 1 23.0 0.0 0 \n", "\n", " prevalentHyp diabetes totChol sysBP diaBP BMI heartRate glucose \\\n", "0 0 no 195.0 106.0 70.0 26.97 80.0 77.0 \n", "1 0 no 250.0 121.0 81.0 28.73 95.0 76.0 \n", "2 0 no 245.0 127.5 80.0 25.34 75.0 70.0 \n", "3 1 no 225.0 150.0 95.0 28.58 65.0 103.0 \n", "4 0 no 285.0 130.0 84.0 23.10 85.0 85.0 \n", "\n", " TenYearCHD \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 1 \n", "4 0 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Examine dataset\n", "fm.head() " ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" }, "latex_envs": { "LaTeX_envs_menu_present": true, "autoclose": false, "autocomplete": true, "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 1, "hotkeys": { "equation": "Ctrl-E", "itemize": "Ctrl-I" }, "labels_anchors": false, "latex_user_defs": false, "report_style_numbering": false, "user_envs_cfg": false }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }